

import pandas as pd
import statsmodels.formula.api as smf
import sys

# When running this file usually also add argument which is the path for the table
try:
    overleaf_path = sys.argv[1]
except:
    overleaf_path = './../Apps/Overleaf/bbm/draft/reports/revision/'

#%% -------------------------------------------------------------------------------------
df_wells = pd.read_csv(
    './data_py/temp/06_merge_contracts_wells/wells_merged_no_impute.csv', index_col=[0])
df_wells = df_wells.drop_duplicates('api')

#%% MERGE WITH GAS PRICE DATA -----------------------------------------------------------
df_states = pd.read_csv(
    './data_py/processed/states_month.csv', index_col=[0])

df_wells['month'] = pd.to_datetime(df_wells['fixture_date']).dt.strftime('%Y-%m')
df_states['month'] = pd.to_datetime(df_states['month']).dt.strftime('%Y-%m')

df_wells = df_wells.merge(
    df_states,
    on='month',
    how='left'
)

#%% CREATE VARS -------------------------------------------------------------------------
mean_gas = df_states['gas'].mean()
df_wells['boom'] = (df_wells['gas'] >= mean_gas)
df_wells['spud_date'] = pd.to_datetime(df_wells['spud_date'])
df_wells['depth_date'] = pd.to_datetime(df_wells['depth_date'])
df_wells['duration'] = (df_wells['depth_date'] - df_wells['spud_date']).dt.days
df_wells['mri_2'] = df_wells['mri'] * df_wells['mri']
df_wells['mri_3'] = df_wells['mri'] * df_wells['mri'] * df_wells['mri']

#%% DO REGRESSIONS ----------------------------------------------------------------------
formulas_by_reg = dict()
summary_to_tex = dict()

# Set formulas
formulas_by_reg[0] = 'duration ~ boom + mri + mri_2 + mri_3'
formulas_by_reg[1] = 'duration ~ boom + mri + mri_2 + mri_3 + C(spec)'
formulas_by_reg[2] = 'duration ~ boom + mri + mri_2 + mri_3 + C(spec) ' \
                     '+ C(spec):mri + C(spec):mri_2 + C(spec):mri_3'

# Do regressions
for i in [0, 1, 2]:
    reg = smf.ols(
        formula=formulas_by_reg[i],
        data=df_wells
    ).fit(cov_type='HC0')
    summary_to_tex[f"r_{i}"] = round(reg.rsquared, 2)
    summary_to_tex[f"n_{i}"] = int(reg.nobs)
    summary_to_tex[f"coef_{i}"] = round(reg.params['boom[T.True]'], 2)
    summary_to_tex[f"se_{i}"] = round(reg.HC0_se['boom[T.True]'], 2)
    p_value = reg.pvalues['boom[T.True]']

    # Add stars to the coefficients in the table
    if p_value < 0.01:
        summary_to_tex[f"coef_{i}"] = summary_to_tex[f"coef_{i.round(2)}"] + "***"
    if (p_value >= 0.01) & (p_value < 0.05):
        summary_to_tex[f"coef_{i}"] = summary_to_tex[f"coef_{i.round(2)}"] + "**"
    if (p_value >= 0.05) & (p_value < 0.1):
        summary_to_tex[f"coef_{i}"] = summary_to_tex[f"coef_{i.round(2)}"] + "*"

#%% PRODUCE THE TABLE -------------------------------------------------------------------
with open('./src/tex/table_duration.tex', 'r') as f:
    tex = f.read()
    output = tex.format(**summary_to_tex)

with open(overleaf_path + 'tables/table_duration.tex', 'w') as f:
    f.write(output)
